from pyspark import SparkConf, SparkContext

if __name__ == '__main__':
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    sc = SparkContext(conf=conf)

    file_rdd = sc.textFile("../data/input/words.txt")

    word_rdd = file_rdd.flatMap(lambda x: x.split(" "))

    word_with_one_rdd = word_rdd.map(lambda word: (word, 1))

    # mapValues 对相同key 的数据执行聚合相加
    result_rdd = word_with_one_rdd.reduceByKey(lambda x, y: x + y)

    print(result_rdd.collect())
